Set Home Directory
# setwd("/home/yourname/NYCTaxiData")
Load data
mydata = read.csv("sample.csv")
dim(mydata)
## [1] 100000 21
Install t-map package (Library for thematic maps) and other required R packages
#install.packages("dplyr")
#install.packages("sf")
#install.packages("curl")
#Restart your R Session
#install.packages("tmap")
Install ggmap
#install.packages("ggmap")
#OR (choose whichever works on your computer)
#install.packages("devtools")
#devtools::install_github("dkahle/ggmap")
Load libraries
library(dplyr)
library(sf)
library(curl)
library(ggmap)
library(tmap)
library(tmaptools)
Download and view map
map<-get_stamenmap(rbind(as.numeric(paste(geocode_OSM("Manhattan")$bbox))), zoom = 11)
ggmap(map)
Plot pickup locations
ggmap(map) + geom_point(aes(x = pickup_longitude, y = pickup_latitude),colour="white", size = 0.01, data = mydata, alpha = .5)
Square binning
plotmap <- ggmap(map) + geom_bin2d(bins=100, data=mydata, aes(x =
pickup_longitude, y = pickup_latitude))
plotmap
Install HexBin
#install.packages("hexbin")
Plot HexBin
plotmapHB <- ggmap(map) + coord_cartesian() + geom_hex(bins=100, data=mydata,
aes(x = pickup_longitude, y = pickup_latitude))
plotmapHB
Load Data into SparklyR
library(sparklyr)
library(dplyr)
sc <- spark_connect(master = "local")
nyc_taxi <- spark_read_csv(sc, name = "taxi_data", path ="sample.csv", header = TRUE, delimiter = ",")
Manual square binning by rounding
nyc_taxi <- nyc_taxi %>%
mutate(pickup_latitude = round(pickup_latitude,3))%>%
mutate(pickup_longitude = round(pickup_longitude,3))%>%
sdf_register("nyc_taxi")
Saving data
spark_write_csv(nyc_taxi,"rounded",header=TRUE,delimiter=",", mode="overwrite")
Calling data summary and saving
nyc_taxi_summary <- nyc_taxi %>%
group_by(pickup_latitude, pickup_longitude) %>%
summarise(n=n()) %>%
sdf_register("nyc_taxi_summary")
#save summary
spark_write_csv(nyc_taxi_summary,"summary",header=TRUE,delimiter=",", mode="overwrite")
Saving summary
plotmap <- ggmap(map) + geom_point(aes(x = pickup_longitude, y = pickup_latitude, colour=n, fill=n), data = nyc_taxi_summary, shape=22, size=0.25)
ggsave("plot.png")